Load Libraries and functions

#setwd("/Volumes/HAVRANEK18/Lab_testing_flasks")  
library(ggplot2)
library (plotly)
library (lubridate)
library(tidyverse)
library(grid)
library(hms)
library(fpp2)
library(zoo)

#derivative function 
deriv <- function(x, y) {
  diffvar <- diff(y) / diff(x)
}

peaks.read <- function(filepath){
    files <-  list.files(path=filepath, recursive = T,full.names = T) 
    #lists the file names in the filepath directory
    tables <- lapply(files, read.table,header = T, stringsAsFactors = F) 
    #reads in the files
    df <- do.call("rbind",tables) #combines all the tables into one dataframe
    df2 <- df[,c(1:2,17:19)] #subsets the dataframe to include only necessary columns
    df3 <- df2[complete.cases(df2),] #subsets the dataframe to include only 
    #complete cases, eliminating rows at the end that aren't complete
    return(df3)
    } #peaks.read is a function that will read files from all sub-folders of the
    #directory named in filepath and combine them into one dataframe

h2o_slope <- function(data,window){
  H2O_Slope_Window <- numeric()
  for (i in 1:nrow(data)){    
    if(i<=window/2){                                                                   
      H2O_Slope_Window[i] = 
        as.numeric(coef(lm(data$H2O[1:(i+window/2)]~
        data$seconds[1:(i+window/2)]))[2])
    }else{
      if(i >= (nrow(data)-window/2)){
        H2O_Slope_Window[i] = 
          as.numeric(coef(lm(data$H2O[(i-window/2):nrow(data)]~
        data$seconds[(i-window/2):nrow(data)]))[2])
      }else{
        H2O_Slope_Window[i] = 
          as.numeric(coef(lm(data$H2O[(i-window/2):(i+window/2)]~
        data$seconds[(i-window/2):(i+window/2)]))[2])       
      }
    }
  }
  return(H2O_Slope_Window)
} #h2o_slope will calculate the slope of h2o vs. experiment second in a moving window  

Load Data - This is almost assuredly broken b/c I moved the RDS files. I will need to update this so the files read from the flash drive, and then saves into the data files

# folder_110418 <- "/Volumes/HAVRANEK18/110418"
# data_110418 <- peaks.read(folder_110418) 
# saveRDS(data_110418, "data_110418.RDS")
data_110418 <- readRDS("/Volumes/HAVRANEK18/Lab_testing_flasks/data/data_110418.RDS")

# I did a lot of work between 5 - 10 pm so I need to bring in the data from the fifth b/c the picarro uses GMT time 

# folder_110518 <- "/Volumes/HAVRANEK18/110518"
# data_110518 <- peaks.read(folder_110518) 
# saveRDS(data_110518, "data_110518.RDS")
data_110518 <- readRDS("/Volumes/HAVRANEK18/Lab_testing_flasks/data/data_110518.RDS")

1. Use Lubridate to match Picarro time with lab Notes

2. Parse the data down to when I was running experiments, and plot that water concentration data

data_110418 <- data_110418 %>% mutate(
  MST = 
    paste(data_110418$DATE, data_110418$TIME) %>% 
    ymd_hms() %>% 
    with_tz(tzone=c("America/Denver"))
)

data_110518 <- data_110518 %>% mutate(
  MST = 
    paste(data_110518$DATE, data_110518$TIME) %>% 
    ymd_hms() %>% 
    with_tz(tzone=c("America/Denver"))
)

evening_110418 <- data_110518 %>% 
  filter( MST < "2018-11-04 23:00:00")

data_110418 <- bind_rows(data_110418, evening_110418)

all_110418 <- ggplot(data_110418, aes(MST, H2O))+
  labs(x="Time (MST)", title="November 04, 2018")+
  geom_point()+
  theme_classic()

all_110418

Plot the filling stage with grob labels

fill_110418 <- evening_110418 %>% 
  filter( MST < "2018-11-04 20:00:00")

#These labels help me visually in my plots 
label1 <- grobTree(textGrob("Bottle 16", x=0.1,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

label2 <- grobTree(textGrob("Bottle 15", x=0.35,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

label3 <- grobTree(textGrob("Bottle 14", x=0.55,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

label4 <- grobTree(textGrob("Bottle 2", x=0.8,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

label5 <- grobTree(textGrob("Jumper", x=0.9,  y=0.95, hjust=0,
  gp=gpar(col="red", fontsize=13, fontface="italic")))

fill_concen_110418 <- ggplot(fill_110418, aes(MST, H2O))+
  labs(x="Time (MST)", title="November 03, 2018")+
  geom_point()+
  theme_classic()+
  annotation_custom(label1)+
  annotation_custom(label2)+
  annotation_custom(label3)+
  annotation_custom(label4)+
  annotation_custom(label5)

print(fill_concen_110418)

Fill flasks with water vapor, and reduce the input values

fill_16_110418 <- filter(evening_110418, MST > "2018-11-04 17:27:00" & MST < "2018-11-04 17:37:00")
df16 <- data.frame(
  "valco_position" = 16,
  "mean_d18O" = mean (fill_16_110418$Delta_18_16),
  "sd_d18O" = sd (fill_16_110418$Delta_18_16),
  "mean_dD" = mean(fill_16_110418$Delta_D_H),
  "sd_dD" = sd(fill_16_110418$Delta_D_H),
  stringsAsFactors = FALSE
)

fill_15_110418 <- filter (evening_110418, MST > "2018-11-04 18:08:00" & MST < "2018-11-04 18:18:00")
df15 <- data.frame(
  "valco_position" = 15,
  "mean_d18O" = mean (fill_15_110418$Delta_18_16),
  "sd_d18O" = sd (fill_15_110418$Delta_18_16),
  "mean_dD" = mean(fill_15_110418$Delta_D_H),
  "sd_dD" = sd(fill_15_110418$Delta_D_H),
  stringsAsFactors = FALSE
)

fill_14_110418 <- filter (evening_110418, MST > "2018-11-04 18:48:00" & MST < "2018-11-04 18:58:00")
df14 <- data.frame(
  "valco_position" = 14,
  "mean_d18O" = mean (fill_14_110418$Delta_18_16),
  "sd_d18O" = sd (fill_14_110418$Delta_18_16),
  "mean_dD" = mean(fill_14_110418$Delta_D_H),
  "sd_dD" = sd(fill_14_110418$Delta_D_H),
  stringsAsFactors = FALSE
)

fill_2_110418 <- filter (evening_110418, MST > "2018-11-04 19:37:00" & MST < "2018-11-04 19:47:00")
df2 <- data.frame(
  "valco_position" = 2,
  "mean_d18O" = mean (fill_2_110418$Delta_18_16),
  "sd_d18O" = sd (fill_2_110418$Delta_18_16),
  "mean_dD" = mean(fill_2_110418$Delta_D_H),
  "sd_dD" = sd(fill_2_110418$Delta_D_H),
  stringsAsFactors = FALSE
)

fill_jumper_110418 <- filter (evening_110418, MST > "2018-11-04 19:50:00" & MST < "2018-11-04 19:59:00")
df1 <- data.frame(
  "valco_position" = 1,
  "mean_d18O" = mean (fill_jumper_110418$Delta_18_16),
  "sd_d18O" = sd (fill_jumper_110418$Delta_18_16),
  "mean_dD" = mean(fill_jumper_110418$Delta_D_H),
  "sd_dD" = sd(fill_jumper_110418$Delta_D_H),
  stringsAsFactors = FALSE
)

fill_110418_summary <-  bind_rows(df16, df15, df14, df2, df1)

Pull out results data and add in time averaging

Out_110418 <- evening_110418 %>% 
  filter( MST >"2018-11-04 20:00:00" & MST < "2018-11-04 22:00:00")

Out_110418$seconds <- as.numeric(row.names(Out_110418))
 
Out_110418 <- Out_110418 %>% 
  mutate(
  m_11= rollmean(H2O, k=11, fill=NA),
  m_25=rollmean(H2O, k=25, fill=NA)) %>% 
  subset(seconds >13 & seconds<(nrow(Out_110418)-13))

Calulate the first derivative and second derivative of water concentration data, then plot the second derivative

#Initialize data frame
Out_110418_fd<-as.data.frame(1:(nrow(Out_110418)-1))

# mutate in the first derivatives, and the rolling first derivatives 
Out_110418_fd<- Out_110418_fd %>% 
  mutate(
    fd = deriv(Out_110418$seconds, Out_110418$H2O),
    fd11=deriv(Out_110418$seconds, Out_110418$m_11),
    fd25=deriv(Out_110418$seconds, Out_110418$m_25)
  )

#Force column names so I can do the second derivative 
colnames(Out_110418_fd, do.NULL = FALSE)
## [1] "1:(nrow(Out_110418) - 1)" "fd"                      
## [3] "fd11"                     "fd25"
colnames(Out_110418_fd)<-c("seconds", "fd", "fd11", "fd25")

#Initialize second derivative data frame
Out_110418_sd <- as.data.frame(1:(nrow(Out_110418_fd)-1))

#Mutate in the second derivatives 
Out_110418_sd <- Out_110418_sd %>% 
  mutate(
    sd=deriv(Out_110418_fd$seconds, Out_110418_fd$fd),
    sd11=deriv(Out_110418_fd$seconds, Out_110418_fd$fd11),
    sd25=deriv(Out_110418_fd$seconds, Out_110418_fd$fd25)
  )

#Force column names of the second derivative df 
colnames(Out_110418_sd, do.NULL = FALSE)
## [1] "1:(nrow(Out_110418_fd) - 1)" "sd"                         
## [3] "sd11"                        "sd25"
colnames(Out_110418_sd)<-c("seconds", "sd", "sd11", "sd25")

sd_110418_plot<-ggplot(Out_110418_sd)+
  geom_point(aes(seconds, sd),colour = "green")+
  geom_point(aes(seconds, sd11),colour = "blue")+
  geom_point(aes(seconds, sd25), colour = "red")+
  annotation_custom(label1)+
  annotation_custom(label2)+
  annotation_custom(label3)+
  annotation_custom(label4)+
  scale_y_continuous(limits = c(-5, 5))+
  labs(x="Seconds", title="second derivative")+
  theme_classic()

print(sd_110418_plot)
## Warning: Removed 4117 rows containing missing values (geom_point).
## Warning: Removed 475 rows containing missing values (geom_point).
## Warning: Removed 502 rows containing missing values (geom_point).

ggplotly(sd_110418_plot)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCustomAnn() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCustomAnn() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCustomAnn() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCustomAnn() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

From this chunk, determine the seconds (in the Out_110418 data frome) where the second derivative of water concentration is approximately 0. Use the first 300 seconds (5 minutes) where the system is draining at steady state. Then feed that into the data frames in the next chunk

Calculate results using times when the second derivative of water concentration ~0

#Create a data frame for each bottle 
results_110418_16 <- filter(Out_110418, seconds > 900 & seconds < 1200)
results_110418_15 <- filter (Out_110418, seconds > 2750 & seconds < 3050)
results_110418_14 <- filter (Out_110418, seconds > 4600 & seconds < 4900)
results_110418_2 <- filter (Out_110418, seconds > 6500 & seconds < 6800)
results_110418_1 <- filter (Out_110418, seconds > 7874 & seconds < 8400)

#Create a data frame for each bottle and then smoosh the dfs together
R16_df <- data.frame(
  "valco_position" = 16,
  "Time averaged" = 5,
  "Type" = "result",
  "mean_d18O" = mean (results_110418_16$Delta_18_16),
  "sd_d18O" = sd (results_110418_16$Delta_18_16),
  "mean_dD" = mean(results_110418_16$Delta_D_H),
  "sd_dD" = sd(results_110418_16$Delta_D_H),
  stringsAsFactors = FALSE
)

R15_df <- data.frame(
  "valco_position" = 15,
  "Time averaged" = 5,
  "Type" = "result",
  "mean_d18O" = mean (results_110418_15$Delta_18_16),
  "sd_d18O" = sd (results_110418_15$Delta_18_16),
  "mean_dD" = mean(results_110418_15$Delta_D_H),
  "sd_dD" = sd(results_110418_15$Delta_D_H),
  stringsAsFactors = FALSE
)

R14_df <- data.frame(
  "valco_position" = 14,
  "Time averaged" = 5,
  "Type" = "result",
  "mean_d18O" = mean (results_110418_14$Delta_18_16),
  "sd_d18O" = sd (results_110418_14$Delta_18_16),
  "mean_dD" = mean(results_110418_14$Delta_D_H),
  "sd_dD" = sd(results_110418_14$Delta_D_H),
  stringsAsFactors = FALSE
)

R2_df <- data.frame(
  "valco_position" = 2,
  "Time averaged" = 5,
  "Type" = "result",
  "mean_d18O" = mean (results_110418_2$Delta_18_16),
  "sd_d18O" = sd (results_110418_2$Delta_18_16),
  "mean_dD" = mean(results_110418_2$Delta_D_H),
  "sd_dD" = sd(results_110418_2$Delta_D_H),
  stringsAsFactors = FALSE
)

R1_df<- data.frame(
  "valco_position" = 1,
  "Time averaged" = 8.76,
  "Type" = "Correction",
  "mean_d18O" = mean (results_110418_1$Delta_18_16),
  "sd_d18O" = sd (results_110418_1$Delta_18_16),
  "mean_dD" = mean(results_110418_1 $Delta_D_H),
  "sd_dD" = sd(results_110418_1 $Delta_D_H),
  stringsAsFactors = FALSE
)

results_110418_summary <- bind_rows(R16_df, R15_df, R14_df, R2_df, R1_df)

Bind together input and results, write to csv

Here I need to change this so that it saves to the data_output folders

Summary_110418 <- bind_rows(fill_110418_summary, results_110418_summary)
print(Summary_110418)
##    valco_position mean_d18O   sd_d18O   mean_dD     sd_dD Time.averaged
## 1              16 -24.95446 0.2005179 -185.9307 0.6231383            NA
## 2              15 -24.99653 0.2035923 -186.3609 0.6309402            NA
## 3              14 -24.66604 0.2148824 -186.2368 0.6341125            NA
## 4               2 -24.68222 0.2019680 -186.1628 0.6037736            NA
## 5               1 -24.86418 0.2346139 -186.2695 0.6054040            NA
## 6              16 -24.95140 0.2352566 -185.5214 0.6632035          5.00
## 7              15 -25.21145 0.2136877 -185.5911 0.6457631          5.00
## 8              14 -24.97166 0.2326514 -185.1155 0.7057297          5.00
## 9               2 -25.03506 0.2178029 -185.9696 0.7176271          5.00
## 10              1 -23.16637 0.4698788 -186.0463 1.7536300          8.76
##          Type
## 1        <NA>
## 2        <NA>
## 3        <NA>
## 4        <NA>
## 5        <NA>
## 6      result
## 7      result
## 8      result
## 9      result
## 10 Correction
#write.csv(Summary_110418, "November 04, 2018 Summary table.csv")